##Redistricting Simulation Script GA#############
####################################################################
####pkgs
library(redist) # we used version 2.0.2, which was the latest version when we did the analysis
library(foreign)
library(tidyr)
library(dplyr)
library(sp) # basic spatial object handling
library(raster) # raster (pixel) object handling
library(rgdal) # shapefile access
library(rgeos) # geometry operations
library(reshape2)
library(stringi)
library(stringr)
library(splitstackshape)
library(data.table)
library(ggplot2)
library(scales)
##install the arealOverlap pkg , which I created 
#devtools::install_github("https://github.com/jcuriel-unc/arealOverlap2",subdir="arealOverlap")
library(arealOverlap)
library(wru)
#devtools::install_github("https://github.com/jcuriel-unc/zipWRUext",subdir="zipWRUext2")
library(zipWRUext2)
###set directory 
#data_wd <- dirname(rstudioapi::getActiveDocumentContext()$path)
#setwd(data_wd)

##get start time 
start_time_all <- Sys.time()

#### Load in congressional shapefiles 
#source: https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html
cong_shp <- readOGR("shpfiles/congress", "cb_2018_us_cd116_500k")

###now load the state of interest, in this case GA
ga_cong <- subset(cong_shp, STATEFP == "13" )

###read in the precinct file 
ga_prec <- readOGR("shpfiles/ga_precinct", "2018Precincts")
head(ga_prec@data)

###we will now want to read in a voterfile from the same date as the prec shpfile 
### create a pii version here; or a script that handles this in general 
ga_voter_reg <- read.csv("shpfiles/ga_precinct/GAvoterfilesmall_anon.csv")

### now let's get the names of the relevant fields - already preprocessed this part  
names(ga_voter_reg)

###now let's create values for: white, black, other ; dem, gop, other_party
#ga_voter_reg$race_code <- trimws(ga_voter_reg$race_code)
ga_voter_reg$white <- 0
ga_voter_reg$white[ga_voter_reg$race=="WH"] <- 1
ga_voter_reg$black <- 0
ga_voter_reg$black[ga_voter_reg$race=="BH"] <- 1
ga_voter_reg$hispanic <- 0
ga_voter_reg$hispanic[ga_voter_reg$race=="HP"] <- 1
ga_voter_reg$other_race <- 0
ga_voter_reg$other_race[ga_voter_reg$race!="WH" & ga_voter_reg$race!="BH" & ga_voter_reg$race!="HP"] <- 1

#NOTE: I already dropped 3542 voters who had missing zipcodes
### Let's now run BISG
start_time <- Sys.time()
ga_voter_reg <- zip_wru(ga_voter_reg, state="GEORGIA", type1="census", year1="2010", 
                                zip_col="zipcode", surname_field = "last_name_voterfile")
sum(is.na(ga_voter_reg$pred.whi)) 
end_time <- Sys.time()
##total time 
end_time - start_time ### 4.25 mins

census.ga_county <- get_census_data("b85306550d1fd788ddc045abfa6acf6ba7110abc",state=c("GA"),age=FALSE,sex=FALSE,
                             census.geo="county")

###now let's subset the nc voter reg file 
###now let's do counties; we will want to merge on the info 
county_fips <- read.csv("data/county-fips-codes.csv")
county_fips <- subset(county_fips, state =="Georgia")

ga_voter_reg <- merge(ga_voter_reg,county_fips,by.x="county",by.y="county_name",all.x=T )
sum(is.na(ga_voter_reg$county_fips))###good, nothing is missing 
head(ga_voter_reg$county_fips)
## fixing the county fips strings  
ga_voter_reg$localty <- ga_voter_reg$county
ga_voter_reg$county <- substr(ga_voter_reg$county_fips, 3,5)
colnames(ga_voter_reg)[colnames(ga_voter_reg)=="state"] <- "state_name"
ga_voter_reg$state <- "GA"

## Now subset out 
ga_voter_reg_missing <- subset(ga_voter_reg, is.na(pred.whi)==TRUE)
ga_voter_reg <- subset(ga_voter_reg, is.na(pred.whi)==FALSE)

###drop the columns not of interest from the missing data 
ga_voter_reg_missing <- subset(ga_voter_reg_missing, select=-c(surname.match,pred.whi,pred.bla,pred.his,pred.asi,
                                                               pred.oth))

####let's now run the BISG here for county 
ga_voter_reg_missing <- predict_race(ga_voter_reg_missing, census.geo = "county", census.data = census.ga_county,
                                     age=FALSE, sex=FALSE)
sum(is.na(ga_voter_reg_missing$pred.whi)) #nothing missing; should be able to join back on now 
names(ga_voter_reg_missing) # let's add on the surname.match field 
ga_voter_reg_missing$surname.match <- ""

###now let's bind again 
ga_voter_reg <- rbind(ga_voter_reg, ga_voter_reg_missing)

##assign race categorically 
ga_voter_reg <- race_herfindahl_scores(ga_voter_reg)
### now create new dummy var 
ga_voter_reg$white_plural <- 0
ga_voter_reg$white_plural[ga_voter_reg$plural_race=="white"] <- 1
ga_voter_reg$black_plural <- 0
ga_voter_reg$black_plural[ga_voter_reg$plural_race=="black"] <- 1
ga_voter_reg$hispanic_plural <- 0
ga_voter_reg$hispanic_plural[ga_voter_reg$plural_race=="hispanic"] <- 1
ga_voter_reg$other_plural <- 0
ga_voter_reg$other_plural[ga_voter_reg$plural_race!="white" & ga_voter_reg$plural_race!="black"] <- 1

##check to make sure it matches 
sum(ga_voter_reg$white_plural) - sum(ga_voter_reg$pred.whi)
sum(ga_voter_reg$black_plural) - sum(ga_voter_reg$pred.bla)
sum(ga_voter_reg$hispanic_plural) - sum(ga_voter_reg$pred.his)
sum(ga_voter_reg$other_plural) 

##save data 
saveRDS(ga_voter_reg, "data/ga_voter_reg_cleanedwbisg.rds")
ga_voter_reg <- readRDS("data/ga_voter_reg_cleanedwbisg.rds")

######## Figure S2(b) #############################
ga_voter_reg$eff_num_races <- (1/ga_voter_reg$herf_weight)
summary(ga_voter_reg$eff_num_races)
ga_density_herf <-  ggplot(ga_voter_reg, aes(x=eff_num_races)) +
  geom_density(alpha=0.5, fill="blue") + xlim(1,5) + 
  labs(title="GA BISG Uncertainty",y="Density",x="Effective Number of Races") + theme_minimal()
ga_density_herf
ggsave("plots/ga_herfindahl_density.png" ,plot=ga_density_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )
######################################################



#plot error rate by herfindahl index:

ga_voter_reg$err_b_white <- abs(ga_voter_reg$pred.whi - ga_voter_reg$white)
ga_voter_reg$err_b_black <- abs(ga_voter_reg$pred.bla - ga_voter_reg$black)
ga_voter_reg$err_b_hispanic <- abs(ga_voter_reg$pred.his - ga_voter_reg$hispanic)
ga_voter_reg$err_b_other <- abs(ga_voter_reg$pred.oth - ga_voter_reg$other_race)

ga_voter_reg$err_p_white <- abs(ga_voter_reg$white_plural - ga_voter_reg$white)
ga_voter_reg$err_p_black <- abs(ga_voter_reg$black_plural - ga_voter_reg$black)
ga_voter_reg$err_p_hispanic <- abs(ga_voter_reg$hispanic_plural - ga_voter_reg$hispanic)
ga_voter_reg$err_p_other <- abs(ga_voter_reg$other_plural - ga_voter_reg$other_race)


#collapse errors down to zip code level, for plotting

ga_voter_reg_herf <- ga_voter_reg %>% 
  group_by(zcta5) %>% 
  summarise(eff_num_races=mean(eff_num_races), err_b_white=mean(err_b_white), err_b_black=mean(err_b_black), 
            err_b_hispanic=mean(err_b_hispanic), err_b_other=mean(err_b_other), 
            err_p_white=mean(err_p_white), err_p_black=mean(err_p_black), 
            err_p_hispanic=mean(err_p_hispanic), err_p_other=mean(err_p_other),)

############# Figure S4 ########################### 
err_b_white_herf <-  ggplot(ga_voter_reg_herf, aes(x=eff_num_races, y=err_b_white)) +
  geom_smooth(method=loess) + expand_limits(y = c(0, 0.75)) + xlim(1,3) + theme_minimal() +labs(title="BISG Prob. Sums (PSM) Average Error - White",y="Average Error",x="Effective number of races")
err_b_white_herf
ggsave("plots/ga_err_b_white_herf.png" ,plot=err_b_white_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )

err_b_black_herf <-  ggplot(ga_voter_reg_herf, aes(x=eff_num_races, y=err_b_black)) +
  geom_smooth(method=loess) + expand_limits(y = c(-0.15, 0.5)) + xlim(1,3) + theme_minimal() +labs(title="BISG Prob. Sums (PSM) Average Error - Black",y="Average Error",x="Effective number of races")
err_b_black_herf
ggsave("plots/ga_err_b_black_herf.png" ,plot=err_b_black_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )

err_p_white_herf <-  ggplot(ga_voter_reg_herf, aes(x=eff_num_races, y=err_p_white)) +
  geom_smooth(method=loess) + expand_limits(y = c(0, 0.75)) + xlim(1,3) + theme_minimal() +labs(title="Plurality (PM) Average Error - White",y="Average Error",x="Effective number of races")
err_p_white_herf
ggsave("plots/ga_err_p_white_herf.png" ,plot=err_p_white_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )

err_p_black_herf <-  ggplot(ga_voter_reg_herf, aes(x=eff_num_races, y=err_p_black)) +
  geom_smooth(method=loess) + expand_limits(y = c(-0.15, 0.5)) + xlim(1,3) + theme_minimal() +labs(title="Plurality (PM) Average Error - Black",y="Average Error",x="Effective number of races")
err_p_black_herf
ggsave("plots/ga_err_p_black_herf.png" ,plot=err_p_black_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )
###################################################



########### OVERLAYING THE PRECINCTS ONTO THE CONGRESSIONAL FILE ###########################

###Let's overlay the CDs 
ga_cong <- spTransform(ga_cong, CRS=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))

ga_prec2edits <- readOGR("shpfiles", "ga_precinct_cd_edits")


#merge onto precinct level voter data
ga_voterfile_by_precinct <- readRDS("shpfiles/ga_precinct/ga_voterfile_by_precinct.rds")
class(ga_voterfile_by_precinct)
class(ga_prec2edits)

colnames(ga_voterfile_by_precinct)[colnames(ga_voterfile_by_precinct)=="loc_prec"] <- "loc_prc"

ga_prec2edits <- merge(ga_prec2edits,ga_voterfile_by_precinct, by="loc_prc")



#####now let's set up the simulations ##########################
adj.mat<- gTouches(ga_prec2edits, byid = TRUE)#this worked and will act as the adjacency matrix. 
adj.mat2<-as.matrix(adj.mat)

adj.mat2[1:3,1:3] ##see, it is true/false
#Now create a  0 or 1 matrix from a true or false matrix.

adj.mat3<-adj.mat2*1 # A very easy command. 
diag(adj.mat3) <- 1 #should of course be contiguous with self

### Step 4: Run checks on the matrix 
#how redist checks a matrix ; all should be TRUE 
## Is it square?
squaremat <- (nrow(adj.mat3) == ncol(adj.mat3))
squaremat
## All binary entries?
binary <- ((length(unique(c(adj.mat3))) == 2) &
             (sum(unique(c(adj.mat3)) %in% c(0, 1)) == 2))
binary
## Diagonal elements all 1?
diag <- (sum(diag(adj.mat3)) == nrow(adj.mat3))
diag
## Symmetric?
symmetric <- isSymmetric(adj.mat3)
symmetric

# Step 5: Convert the adjacency matrix to list form
##Note: This will avoid an error present within the redst pkg

#note: so the default adjacency object is a list. 
#Therefore, the script below is from the redist.mcmc code that converts the mat to 
# a list. I should be able to convert it to a list myself 
adjlist <- vector("list", nrow(adj.mat3))
## Loop through rows in matrix
for(i in 1:nrow(adj.mat3)){
  
  ## Extract row
  adjvec <- adj.mat3[,i]
  ## Find elements it is adjacent to
  inds <- which(adjvec == 1)
  ## Remove self-adjacency
  inds <- inds[inds != i]
  ## Zero-index
  inds <- inds - 1
  ## Put in adjlist
  adjlist[[i]] <- inds}
###a check on the list 
minlist <- min(unlist(adjlist))
maxlist <- max(unlist(adjlist))
oneind <- (sum(minlist == 1, maxlist == length(adjlist)) == 2)
zeroind <- (sum(minlist == 0, maxlist == (length(adjlist) - 1)) == 2)

if(oneind){
  ## Zero-index list
  for(i in 1:length(adjlist)){
    adjlist[[i]] <- adjlist[[i]] - 1
  }
}else if(!(oneind | zeroind)){
  ## if neither oneind or zeroind, then stop
  stop("Adjacency list must be one-indexed or zero-indexed")
}
##If this does not stop, all is good 

#list seems fine 
ga_prec2edits$dist_num<-as.numeric(as.character(ga_prec2edits$CD))

## Step 6: Run the simulations 
##running simulation
options(cores=4) #set multi cores 

start_time <- Sys.time()
alg_census2 <-redist.mcmc(adjobj = adjlist, popvec = ga_prec2edits@data$totl_cb, ndists = 14, initcds = ga_prec2edits$dist_num,
                        nsims=10000, nloop = 4, rngseed = 1991,
                        grouppopvec = ga_prec2edits@data$blck_cb,savename = "ga_output/alg_census_store_ga",
                        popcons = .05, contiguitymap = "rooks", verbose = TRUE)
end_time <- Sys.time()
##total time 
end_time - start_time

###saved with the attachment "loop X" at end; saved as RData ; read those in via a loop 
partition_df_ga <- data.frame()
for (i in 1:4) {
  file_name1 <- paste0("ga_output/alg_census_store_ga",sep="_","loop",sep="",i,sep=".","RData")
  load(file_name1) ###will be saved and named as "algout" ; can then grab partitions 
  temp_algloop <- algout$partitions
  temp_algloop <- as.data.frame(temp_algloop)
  if(nrow(partition_df_ga)==0){
    partition_df_ga <- temp_algloop
  }else{
    partition_df_ga <- cbind(partition_df_ga, temp_algloop)
  }
  
}
rm(alg_census,algout,alg_census2)

###now everything is saved into the partition_df

###slimming down 
head(ga_prec2edits@data)
ga_prec2edits_wpart <- ga_prec2edits@data


###run loop to find the demos 
##good, this worked.Now what we'll need to do is run a loop where I can aggregate all of the data by each column. 
#From there, I can look sort all of the rows and such. Then I'll be able to plot everything. 
#Looks like the name of the first col is "1" . The last is of course a col named "10000". IT is the col 10002. 

race_prec_df <- matrix(NA, ncol=14, nrow = 40000)
race_bisg_df <- matrix(NA, ncol=14, nrow = 40000)
race_plural_bisg_df <- matrix(NA, ncol=14, nrow = 40000)

white_prec_df <- matrix(NA, ncol=14, nrow = 40000)
white_bisg_df <- matrix(NA, ncol=14, nrow = 40000)
white_plural_bisg_df <- matrix(NA, ncol=14, nrow = 40000)

temp_ga_masterdf <- cbind(ga_prec2edits_wpart, partition_df_ga)

for(i in 1:40000){
  ###aggregating the vote, given by the values of the ith + 2 column 
  temp_agg <- aggregate(list(temp_ga_masterdf$tot, temp_ga_masterdf$totl_cb, temp_ga_masterdf$whit_cb,
                             temp_ga_masterdf$blck_cb, 
                             temp_ga_masterdf$white, temp_ga_masterdf$black, temp_ga_masterdf$hispanic,
                             temp_ga_masterdf$pred.whi, temp_ga_masterdf$pred.bla, temp_ga_masterdf$pred.his, temp_ga_masterdf$plural_white,
                             temp_ga_masterdf$plural_black, temp_ga_masterdf$plural_hispanic), 
                        by= list(temp_ga_masterdf[,i+60]), FUN=sum, na.rm = T)
    ###with the new temp_agg object, rename the fields 
  colnames(temp_agg) <- c("district", "total", "cb_total", "white_cb", "black_cb", 
                          "white_prec", "black_prec", "hispanic_prec", "bisg_whi",
                          "bisg_bla", "bisg_his", "white_plural", "black_plural", "hispanic_plural")

  temp_agg$black_prec_pct <- temp_agg$black_prec/(temp_agg$total)
  temp_agg$black_bisg_pct <- temp_agg$bisg_bla/(temp_agg$total)
  temp_agg$black_plural_pct <- temp_agg$black_plural/(temp_agg$total)
  
  ###white
  temp_agg$white_prec_pct <- temp_agg$white_prec/(temp_agg$total)
  temp_agg$white_bisg_pct <- temp_agg$bisg_whi/(temp_agg$total)  
  temp_agg$white_plural_pct <- temp_agg$white_plural/(temp_agg$total)  

  temp_store_race2 <- sort(temp_agg$black_prec_pct)
  temp_store_race3 <- sort(temp_agg$black_bisg_pct)
  temp_store_race4 <- sort(temp_agg$black_plural_pct)
  
  ##temp store white 
  temp_store_race2w <- sort(temp_agg$white_prec_pct)
  temp_store_race3w <- sort(temp_agg$white_bisg_pct)
  temp_store_race4w <- sort(temp_agg$white_plural_pct)
  
      #store the values into a df 
  race_prec_df[i,] <- temp_store_race2
  race_bisg_df[i, ] <- temp_store_race3
  race_plural_bisg_df[i, ] <- temp_store_race4
  ###white results 
  white_prec_df[i,] <- temp_store_race2w
  white_bisg_df[i, ] <- temp_store_race3w
  white_plural_bisg_df[i, ] <- temp_store_race4w
 
}


race_prec_df_ga <- as.data.frame(race_prec_df)
race_bisg_df_ga <- as.data.frame(race_bisg_df)
race_plural_bisg_df_ga <- as.data.frame(race_plural_bisg_df)
white_prec_df_ga <- as.data.frame(white_prec_df)
white_bisg_df_ga <- as.data.frame(white_bisg_df)
white_plural_bisg_df_ga <- as.data.frame(white_plural_bisg_df)

###save the output 
saveRDS(race_prec_df_ga, "ga_output/mpsa_least2most_blackprec_dist_ga.rds")
saveRDS(race_bisg_df_ga, "ga_output/mpsa_least2most_blackbisg_dist_ga.rds")
saveRDS(race_plural_bisg_df_ga, "ga_output/mpsa_least2most_blackbisg_plural_dist_ga.rds")
saveRDS(white_prec_df_ga, "ga_output/mpsa_least2most_whiteprec_dist_ga.rds")
saveRDS(white_bisg_df_ga, "ga_output/mpsa_least2most_whitebisg_dist_ga.rds")
saveRDS(white_plural_bisg_df_ga, "ga_output/mpsa_least2most_whitebisg_plural_dist_ga.rds")


###read in the output 
#race_prec_df_ga <- readRDS("ga_output/mpsa_least2most_blackprec_dist_ga.rds")
#race_bisg_df_ga <- readRDS("ga_output/mpsa_least2most_blackbisg_dist_ga.rds")
#race_plural_bisg_df_ga <- readRDS("ga_output/mpsa_least2most_blackbisg_plural_dist_ga.rds")
#white_prec_df_ga <- readRDS("ga_output/mpsa_least2most_whiteprec_dist_ga.rds")
#white_bisg_df_ga <- readRDS("ga_output/mpsa_least2most_whitebisg_dist_ga.rds")
#white_plural_bisg_df_ga <- readRDS("ga_output/mpsa_least2most_whitebisg_plural_dist_ga.rds")


####Now let's plot the data 
apply(white_prec_df_ga, 2,quantile, probs=c(0.025,.5,.975), na.rm=T )
apply(white_bisg_df_ga, 2,quantile, probs=c(0.025,.5,.975), na.rm=T )
apply(white_plural_bisg_df_ga, 2,quantile, probs=c(0.025,.5,.975), na.rm=T )

###let's get abs diff of matrices 
white_prec_bisg_diff_df <- apply( abs(white_prec_df_ga-white_bisg_df_ga), 2,quantile, probs=c(0.025,.5,.975), na.rm=T )*100
white_prec_plural_diff_df <-apply( abs(white_prec_df_ga-white_plural_bisg_df_ga), 2,quantile, probs=c(0.025,.5,.975), na.rm=T )*100
black_prec_bisg_diff_df <-apply( abs(race_prec_df_ga-race_bisg_df_ga), 2,quantile, probs=c(0.025,.5,.975), na.rm=T )*100
black_prec_plural_diff_df <-apply( abs(race_prec_df_ga-race_plural_bisg_df_ga), 2,quantile, probs=c(0.025,.5,.975), na.rm=T )*100

###Let's now create the plots

#lowest to highest, as per ordering (V1, V2, etc...)
pctblackCD <- data.frame(V1=0.075, V2=0.113, V3=0.146, V4=0.183, V5=0.224, V6=0.264, V7=0.275, V8=0.305, V9=0.325, V10=0.367, V11=0.532, V12=0.571, V13=0.619, V14=0.642)
pctblackCD_IDs <- melt(as.matrix(pctblackCD))
#reshape original data and error bars
black_prec_bisg_diff_df <- rbind(black_prec_bisg_diff_df,pctblackCD)
black_prec_bisg_diff_df_rshp <- melt(as.matrix(black_prec_bisg_diff_df))
#merge to pct black ids
black_prec_bisg_diff_df_rshp <- merge(black_prec_bisg_diff_df_rshp,pctblackCD_IDs, by=c("Var2") )
#reshape again
black_prec_bisg_diff_df_rshp <- reshape(black_prec_bisg_diff_df_rshp, idvar="Var2", v.names = "value.x", timevar = "Var1.x", direction="wide")
#sort
black_prec_bisg_diff_df_rshp <- black_prec_bisg_diff_df_rshp[order(black_prec_bisg_diff_df_rshp$value.y),]


#now plurality difference
#reshape original data and error bars
black_prec_plural_diff_df <- rbind(black_prec_plural_diff_df,pctblackCD)
black_prec_plural_diff_df_rshp <- melt(as.matrix(black_prec_plural_diff_df))
#merge to pct black ids
black_prec_plural_diff_df_rshp <- merge(black_prec_plural_diff_df_rshp,pctblackCD_IDs, by=c("Var2") )
#reshape again
black_prec_plural_diff_df_rshp <- reshape(black_prec_plural_diff_df_rshp, idvar="Var2", v.names = "value.x", timevar = "Var1.x", direction="wide")
#sort
black_prec_plural_diff_df_rshp <- black_prec_plural_diff_df_rshp[order(black_prec_plural_diff_df_rshp$value.y),]


########################## Figure 2 d ##############################################
jpeg("plots/black_bisg_error_sensitivity_ga.jpg", res=600, height = 6, width = 9, units = "in")
###bisg diff
plot(black_prec_bisg_diff_df_rshp$value.y, black_prec_bisg_diff_df_rshp$`value.x.50%`,col="blue", pch=19,
     ylab="% Point Diff in Black Composition", xlab="Congressional District Share Black", ylim=c(0,20), xlim=c(0,0.7), xaxt="n")
axis(1, at = c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7))
arrows(black_prec_bisg_diff_df_rshp$value.y, black_prec_bisg_diff_df_rshp$`value.x.2.5%`, black_prec_bisg_diff_df_rshp$value.y, black_prec_bisg_diff_df_rshp$`value.x.97.5%`,
       length=0.05, angle=90, code=3, col="blue")
#plurality differences
points(black_prec_plural_diff_df_rshp$value.y, black_prec_plural_diff_df_rshp$`value.x.50%`,col="red", pch=19)
arrows(black_prec_plural_diff_df_rshp$value.y, black_prec_plural_diff_df_rshp$`value.x.2.5%`, black_prec_plural_diff_df_rshp$value.y, black_prec_plural_diff_df_rshp$`value.x.97.5%`,
       length=0.05, angle=90, code=3, col="red")

legend("topleft", bty="n",c("Real - BISG Prob. Sums","Real - BISG Plurality"), 
       fill = c("blue","red"), col=c("blue","red"))
dev.off()
############################################################


#lowest to highest WHITE pct now, as per ordering (V1, V2, etc...)
pctwhiteCD <- data.frame(V1=0.206, V2=0.21, V3=0.291, V4=0.381, V5=0.386, V6=0.529, V7=0.556, V8=0.569, V9=0.573, V10=0.613, V11=0.62, V12=0.625, V13=0.729, V14=0.749)
pctwhiteCD_IDs <- melt(as.matrix(pctwhiteCD))
#reshape original data and error bars
white_prec_bisg_diff_df <- rbind(white_prec_bisg_diff_df,pctwhiteCD)
white_prec_bisg_diff_df_rshp <- melt(as.matrix(white_prec_bisg_diff_df))
#merge to pct black ids
white_prec_bisg_diff_df_rshp <- merge(white_prec_bisg_diff_df_rshp,pctwhiteCD_IDs, by=c("Var2") )
#reshape again
white_prec_bisg_diff_df_rshp <- reshape(white_prec_bisg_diff_df_rshp, idvar="Var2", v.names = "value.x", timevar = "Var1.x", direction="wide")
#sort
white_prec_bisg_diff_df_rshp <- white_prec_bisg_diff_df_rshp[order(white_prec_bisg_diff_df_rshp$value.y),]


#now plurality difference
#reshape original data and error bars
white_prec_plural_diff_df <- rbind(white_prec_plural_diff_df,pctwhiteCD)
white_prec_plural_diff_df_rshp <- melt(as.matrix(white_prec_plural_diff_df))
#merge to pct black ids
white_prec_plural_diff_df_rshp <- merge(white_prec_plural_diff_df_rshp,pctwhiteCD_IDs, by=c("Var2") )
#reshape again
white_prec_plural_diff_df_rshp <- reshape(white_prec_plural_diff_df_rshp, idvar="Var2", v.names = "value.x", timevar = "Var1.x", direction="wide")
#sort
white_prec_plural_diff_df_rshp <- white_prec_plural_diff_df_rshp[order(white_prec_plural_diff_df_rshp$value.y),]


########################## Figure 2 b ##############################################
jpeg("plots/white_bisg_error_sensitivity_ga.jpg", res=600, height = 6, width = 9, units = "in")
###bisg diff
plot(white_prec_bisg_diff_df_rshp$value.y,white_prec_bisg_diff_df_rshp$`value.x.50%`,col="blue", pch=19,
     ylab="% Point Diff in White Composition", xlab="Congressional District Share White", ylim=c(0,25), xlim=c(0.2,0.8), xaxt="n")
axis(1, at = c(0.2,0.3,0.4,0.5,0.6,0.7,0.8))
arrows(white_prec_bisg_diff_df_rshp$value.y, white_prec_bisg_diff_df_rshp$`value.x.2.5%`, white_prec_bisg_diff_df_rshp$value.y, white_prec_bisg_diff_df_rshp$`value.x.97.5%`,
       length=0.05, angle=90, code=3, col="blue")
#plurality differences
points(white_prec_plural_diff_df_rshp$value.y, white_prec_plural_diff_df_rshp$`value.x.50%`,col="red", pch=19)
arrows(white_prec_plural_diff_df_rshp$value.y, white_prec_plural_diff_df_rshp$`value.x.2.5%`, white_prec_plural_diff_df_rshp$value.y, white_prec_plural_diff_df_rshp$`value.x.97.5%`,
       length=0.05, angle=90, code=3, col="red")

legend("topleft", bty="n",c("Real - BISG Prob. Sums","Real - BISG Plurality"), 
       fill = c("blue","red"), col=c("blue","red"))
dev.off()
######################################################################################

####Let's get the ga data cleaned 
ga_prec2edits$prec_white_bisg_diff <- 
  abs( (ga_prec2edits$white/ga_prec2edits$tot) - (ga_prec2edits$pred.whi/ga_prec2edits$tot))
ga_prec2edits$prec_black_bisg_diff <- 
  abs( (ga_prec2edits$black/ga_prec2edits$tot) - (ga_prec2edits$pred.bla/ga_prec2edits$tot))
ga_prec2edits$prec_white_bisgp_diff <- 
  abs( (ga_prec2edits$white/ga_prec2edits$tot) - (ga_prec2edits$plural_white/ga_prec2edits$tot))
ga_prec2edits$prec_black_bisgp_diff <- 
  abs( (ga_prec2edits$white/ga_prec2edits$tot) - (ga_prec2edits$plural_black/ga_prec2edits$tot))
###good. Now let's get quantile diff 

###prec to bisg 
quantile(ga_prec2edits$prec_white_bisg_diff, seq(0,1,by=0.05), na.rm=T)
quantile(ga_prec2edits$prec_black_bisg_diff, seq(0,1,by=0.05), na.rm=T)


####Now let's do the density plot, as that seems better display 
ga_prec2edits_df <- ga_prec2edits@data
ga_prec2edits_df_long <- gather(ga_prec2edits_df, key="method", value="pct_diff", 
                                prec_white_bisg_diff:prec_black_bisgp_diff)

white_density <- ggplot(data=subset(ga_prec2edits_df_long, method=="prec_white_bisg_diff" | 
                                      method=="prec_white_bisgp_diff"),aes(x=pct_diff*100,fill=method)) +
  geom_density(alpha=0.3) + theme_minimal() + xlim(0,100) +
  theme(legend.text=element_text(size=8),legend.title = element_text(size=10))+
  scale_fill_discrete(name="Method", labels=c("Prob. Sums (PSM)","Plurality (PM)")) +
  labs(title="White Margin of Error",y="Density",x="% Difference from Reported Race", fill="Method")
white_density

black_density <- ggplot(data=subset(ga_prec2edits_df_long, method=="prec_black_bisg_diff" | 
                                      method=="prec_black_bisgp_diff"),aes(x=pct_diff*100,fill=method)) +
  geom_density(alpha=0.3) + theme_minimal() + xlim(0,100) +
  theme(legend.text=element_text(size=8),legend.title = element_text(size=10))+
  scale_fill_discrete(name="Method", labels=c("Prob. Sums (PSM)","Plurality (PM)")) +
  labs(title="Black Margin of Error",y="Density",x="% Difference from Reported Race", fill="Method")
black_density

####export plots; Figures 1 b and d  
ggsave("plots/black_density_error_plot_ga.png" ,plot=black_density, scale=1,width=9,height=6,units = c("in"),dpi=600 )
ggsave("plots/white_density_error_plot_ga.png" ,plot=white_density, scale=1,width=9,height=6,units = c("in"),dpi=600 )


##ending time 
end_time_all <- Sys.time()

##total time 
end_time_all-start_time_all 

